=== PSRM Replication Run Started: 2025-12-09 08:18:27 ===

R version 4.5.1 (2025-06-13)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.7.2

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
[1] compiler_4.5.1    tools_4.5.1       rstudioapi_0.17.1

Working directory:
 /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final 

Files in root directory:
[1] "01_data"                                    "02_code"                                   
[3] "03_output"                                  "04_application"                            
[5] "05_replication_lowande"                     "PSRM_llm_replication_dataverse_final.Rproj"
[7] "README.txt"                                 "replication.log"                           



>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure1.R @ 2025-12-09 08:18:27 


> library(ggplot2)

> library(patchwork)

> info_expert <- read.csv("05_replication_lowande/expert_tab.csv")

> colnames(info_expert)[3]<- "est"

> colnames(info_expert)[4] <- "se"

> info_expert$topic_ord <- rank(info_expert$est, ties.method = "min")

> info_nonexpert <- read.csv("05_replication_lowande/nonexpert_tab.csv") 

> colnames(info_nonexpert)[3]<- "est"

> colnames(info_nonexpert)[4] <- "se"

> #'Estimated Discretion'
> 
> exp_bs=ggplot(info_expert) + geom_point(aes(x = factor(info_expert$topic_name, levels = info_expert$topic_name[order(info_expert$est, decreasing = FALSE)]), y = est, size= 2)) +
+   geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
+   geom_linerange(aes(x = topic_name, ymin = est-(1.645*se),ymax = est+(1.645*se)),lwd = 1.5, position = position_dodge(width = 1/2)) +
+   geom_linerange(aes(x = topic_name, ymin = est-(1.96*se),ymax = est+(1.96*se)),lwd = 1, position = position_dodge(width = 1/2)) +
+   coord_flip() + theme_bw() + xlab('') + ylab('Expert Coding') +
+   scale_y_continuous(limits=c(-3,3.7)) +
+   theme(legend.title=element_blank(),legend.position='none')

> # ---------------------------
> nonexp_bs=ggplot(info_nonexpert ) + geom_point(aes(x = factor(info_expert$topic_name, levels = info_expert$topic_name[order(info_expert$est, decreasing = FALSE)]), y = est, size= 2)) +
+   geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
+   geom_linerange(aes(x = topic_name, ymin = est-(1.645*se),ymax = est+(1.645*se)),lwd = 1.5, position = position_dodge(width = 1/2)) +
+   geom_linerange(aes(x = topic_name, ymin = est-(1.96*se),ymax = est+(1.96*se)),lwd = 1, position = position_dodge(width = 1/2)) +
+   coord_flip() + theme_bw() + xlab('') + ylab('Non-Expert Coding') +
+   scale_y_continuous(limits=c(-3,3.7)) +
+   theme(legend.title=element_blank(),legend.position='none')

> merged_df <- merge(info_expert, info_nonexpert, by = "topic_name")

> merged_df$difference <- abs(merged_df$est.x - merged_df$est.y)

> topicsdiff <- subset(merged_df, difference > 1)

> red_topics <- topicsdiff$topic_name

> info_nonexpert$color_group <- ifelse(info_expert$topic_name %in% red_topics, "red", "black")

> info_nonexpert$topic_name <- factor(info_expert$topic_name, levels = info_expert$topic_name[order(info_expert$est, decreasing = TRUE)])

> nonexp_bs <- ggplot(info_nonexpert) + geom_point(aes(x = factor(info_expert$topic_name, levels = info_expert$topic_name[order(info_expert$est, decreasing = FALSE)]), color = color_group,y = est, size= 2)) +
+   geom_hline(yintercept = 0, colour = gray(0.5), lty = 2) +
+   geom_linerange(aes(x = topic_name, ymin = est - (1.645 * se), ymax = est + (1.645 * se), color = color_group), 
+                  lwd = 1.5, position = position_dodge(width = 0.5)) +
+   geom_linerange(aes(x = topic_name, ymin = est - (1.96 * se), ymax = est + (1.96 * se), color = color_group), 
+                  lwd = 1, position = position_dodge(width = 0.5)) +
+   coord_flip() + theme_bw() + xlab('') + ylab('Non-Expert Coding') +
+   scale_y_continuous(limits=c(-3,3.7)) +
+   scale_color_manual(values = c("red" = "red", "black" = "black")) +
+   theme(legend.title=element_blank(),legend.position='none')+
+   theme(
+  legend.title = element_blank(),
+  legend.position = 'none',
+  axis.text.y = element_blank(),   
+  axis.title.y = element_blank()   
+  )

> combined_plot <- exp_bs + nonexp_bs

> ggsave(
+   filename = "03_output/figure1.pdf",
+   plot     = combined_plot,
+   width    = 10,
+   height   = 8
+ )
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure1.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure2.R @ 2025-12-09 08:18:28 


> require(caret)
G3;Loading required package: caret
gG3;Loading required package: lattice
g
> library(plyr)

> library(dplyr)
G3;
Attaching package: ‘dplyr’

gG3;The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize

gG3;The following objects are masked from ‘package:stats’:

    filter, lag

gG3;The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

g
> library(MLmetrics)
G3;
Attaching package: ‘MLmetrics’

gG3;The following objects are masked from ‘package:caret’:

    MAE, RMSE

gG3;The following object is masked from ‘package:base’:

    Recall

g
> library(ggplot2)

> library(boot)
G3;
Attaching package: ‘boot’

gG3;The following object is masked from ‘package:lattice’:

    melanoma

g
> coder1 <-read.csv("01_data/upwork_annotation/testing_Ashton.csv")

> coder2 <-read.csv("01_data/upwork_annotation/testing_Dawn.csv")

> coder3 <-read.csv("01_data/upwork_annotation/testing_Dmitry.csv")

> coder4 <-read.csv("01_data/upwork_annotation/testing_Michael.csv")

> coder5 <-read.csv("01_data/upwork_annotation/testing_Shea.csv")

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- test %>% dplyr::select(-Security, -Tax)

> davinci <- read.csv("01_data/machine/davinci.csv")

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,51:58)]

> merged$pred_maj <- ifelse(merged$pred_total>=3,1,0)

> merged$status_maj <- ifelse(merged$status_total>=3,1,0)

> merged$sepc_maj <- ifelse(merged$spec_total>=3,1,0)

> merged$socio_maj <- ifelse(merged$socio_total>=3,1,0)

> merged$human_maj <- ifelse(merged$human_total>=3,1,0)

> merged$env_maj <- ifelse(merged$env_total>=3,1,0)

> merged$ideo_maj <- ifelse(merged$ideo_total>=3,1,0)

> merged$ig_maj <- ifelse(merged$ig_total>=3,1,0)

> #majority <- merged[,c(1,59:66)]
> majority <- merged[,c(1,11:18)]

> combined <- plyr::join(test, davinci, by = "Index") ##davinci

> combined1 <- plyr::join(test, coder1, by = "Index")

> combined2 <- plyr::join(test, coder2, by = "Index")

> combined3 <- plyr::join(test, coder3, by = "Index")

> combined4 <- plyr::join(test, coder4, by = "Index")

> combined5 <- plyr::join(test, coder5, by = "Index")

> combined7 <- plyr::join(test, majority, by = "Index") ##majority

> calculate_accuracy <- function(actual_labels, predicted_labels) {
+   # Ensure both vectors have the same length
+   if (length(actual_labels) != length(predicted_labels)) {
+     stop("Input vectors must have the same length.")
+   }
+   
+   # Calculate accuracy
+   correct_predictions <- sum(actual_labels == predicted_labels)
+   total_predictions <- length(actual_labels)
+   accuracy <- correct_predictions / total_predictions
+   
+   return(accuracy)
+ }

> ## For GPT-3
> set.seed(12345)

> # Number of bootstrap samples
> n_bootstrap <- 1000

> data <- combined[,c(4:11, 14:21)]

> # Number of categories
> n_columns_per_pair <- 8

> # Function to calculate accuracy between i-th and (i+8)-th columns for a single bootstrap sample
> calculate_accuracy <- function(bootstrap_sample) {
+   # Calculate accuracy for each pair of columns
+   accuracies <- numeric(n_columns_per_pair)
+   for (i in 1:n_columns_per_pair) {
+     actual_labels <- bootstrap_sample[, i]
+     predicted_labels <- bootstrap_sample[, i + n_columns_per_pair]
+     accuracies[i] <- calculate_accuracy_function(actual_labels, predicted_labels)
+   }
+   
+   # Return the average accuracy across all pairs
+   average_accuracy <- mean(accuracies)
+   return(average_accuracy)
+ }

> # Placeholder accuracy calculation function
> calculate_accuracy_function <- function(actual_labels, predicted_labels) {
+   accuracy <- mean(actual_labels == predicted_labels)
+   return(accuracy)
+ }

> # Bootstrap loop
> results <- replicate(n_bootstrap, {
+   # Sample with replacement for each bootstrap iteration
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   
+   # Calculate accuracy for this bootstrap sample
+   calculate_accuracy(bootstrap_sample)
+ })

> # Calculate confidence intervals (adjust the confidence level as needed)
> confidence_intervals <- quantile(results, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results))
[1] 0.901667

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals)
    2.5%    97.5% 
0.894875 0.908625 

> ## For Majority
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined7[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results2 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals2 <- quantile(results2, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results2))
[1] 0.8634391

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals2)
     2.5%     97.5% 
0.8555000 0.8715031 

> ##For Coder 1
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined1[,c(4:11, 13:20)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results3 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals3 <- quantile(results3, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results3))
[1] 0.8102337

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals3)
    2.5%    97.5% 
0.801125 0.819875 

> ##For Coder 2
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined2[,c(4:11, 13:20)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results4 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals4 <- quantile(results4, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results4))
[1] 0.8012212

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals4)
     2.5%     97.5% 
0.7917469 0.8106250 

> ##For Coder 3
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined3[,c(4:11, 13:20)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results5 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals5 <- quantile(results5, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results5))
[1] 0.8447651

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals5)
     2.5%     97.5% 
0.8366219 0.8535000 

> ##For Coder 4
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined4[,c(4:11, 13:20)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results6 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals6 <- quantile(results6, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results6))
[1] 0.8104325

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals6)
     2.5%     97.5% 
0.8010000 0.8193781 

> ##For Coder 5
> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined5[,c(4:11, 13:20)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results7 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals7 <- quantile(results7, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results7))
[1] 0.8393916

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals7)
     2.5%     97.5% 
0.8303750 0.8476281 

> ##Roberta Prediction
> roberta<- read.csv("./01_data/machine/robertapredictions.csv")

> colnames(roberta) <- paste0(colnames(roberta), "_roberta")

> colnames(roberta)[1] <- "Index"

> combined_roberta <- plyr::join(test, roberta, by = "Index")

> set.seed(12346)

> n_bootstrap <- 1000

> data <- combined_roberta[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results8 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals8 <- quantile(results8, c(0.025, 0.975))

> mean(combined_roberta$Prediction == combined_roberta$Prediction_roberta)
[1] 0.841

> mean(combined_roberta$Status.Quo_roberta == combined_roberta$Status_quo)
[1] 0.697

> mean(combined_roberta$Specifics == combined_roberta$Specifics_roberta)
[1] 0.894

> mean(combined_roberta$Sociotropic == combined_roberta$Sociotropic_roberta)
[1] 0.824

> mean(combined_roberta$Human.rights_roberta == combined_roberta$Human_right)
[1] 0.95

> mean(combined_roberta$Environment == combined_roberta$Enviornment_roberta)
[1] 0.918

> mean(combined_roberta$Ideology == combined_roberta$ideology_roberta)
[1] 0.975

> mean(combined_roberta$Interest_groups == combined_roberta$Interest.Group_roberta)
[1] 0.895

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results8))
[1] 0.8743594

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals8)
    2.5%    97.5% 
0.867000 0.881625 

> ##Logistic Regression Prediction
> logistic <- read.csv("./01_data/machine/Logisticpredictions.csv")

> colnames(logistic) <- paste0(colnames(logistic), "_roberta")

> colnames(logistic)[1] <- "Index"

> combined_logit <- plyr::join(test,logistic, by = "Index")

> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined_logit[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results9 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals9 <- quantile(results9, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results9))
[1] 0.8056908

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals9)
     2.5%     97.5% 
0.7970000 0.8143781 

> ##Baseline Prediction
> test2 <- test

> test2$Prediction1 <- rep(0,1000)

> test2$Status_quo1 <- rep(0,1000)

> test2$Specifics1 <- rep(0,1000)

> test2$Sociotropic1 <- rep(0,1000)

> test2$Human_right1 <- rep(0,1000)

> test2$Environment1 <- rep(0,1000)

> test2$Ideology1 <- rep(0,1000)

> test2$Interest_groups1 <- rep(0,1000)

> set.seed(12345)

> n_bootstrap <- 1000

> data <- test2[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results10 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals10 <- quantile(results10, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results10))
[1] 0.7476438

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals10)
     2.5%     97.5% 
0.7381219 0.7567531 

> ##Llamma Prediction
> llama <- read.csv("./01_data/machine/llama_prediction.csv")

> colnames(llama) <- paste0(colnames(llama), "_llama ")

> colnames(llama)[1] <- "Index"

> combined_llama <- plyr::join(test,llama, by = "Index")

> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined_llama[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results11 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals11 <- quantile(results11, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results11))
[1] 0.8935762

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals11)
     2.5%     97.5% 
0.8856250 0.9006312 

> ##Mturk_majority Prediction
> coder1 <-read.csv("./01_data/MTURK/annotator1_mturk.csv")

> coder2 <-read.csv("./01_data/MTURK/annotator2_mturk.csv")

> coder3 <-read.csv("./01_data/MTURK/annotator3_mturk.csv")

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3")])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3")])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3")])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3")])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3")])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3")])

> merged$pred_maj <- ifelse(merged$pred_total>=2,1,0)

> merged$status_maj <- ifelse(merged$status_total>=2,1,0)

> merged$sepc_maj <- ifelse(merged$spec_total>=2,1,0)

> merged$socio_maj <- ifelse(merged$socio_total>=2,1,0)

> merged$human_maj <- ifelse(merged$human_total>=2,1,0)

> merged$env_maj <- ifelse(merged$env_total>=2,1,0)

> merged$ideo_maj <- ifelse(merged$ideo_total>=2,1,0)

> merged$ig_maj <- ifelse(merged$ig_total>=2,1,0)

> majority <- merged[,c(1,45:52)]

> combined_mturk <- plyr::join(test, majority, by = "Index") ##majority

> set.seed(12345)

> n_bootstrap <- 1000

> data <-combined_mturk[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results_mturk <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals_mturk <- quantile(results_mturk, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results_mturk ))
[1] 0.8527851

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals_mturk)
     2.5%     97.5% 
0.8448750 0.8603781 

> ##GPT4 Prediction
> gpt4 <- read.csv("./01_data/machine/GPT4_predictions_combined_final.csv")

> colnames(gpt4) <- paste0(colnames(gpt4), "_gpt4 ")

> colnames(gpt4)[1] <- "Index"

> combined_gpt4 <- plyr::join(test,gpt4, by = "Index")

> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined_gpt4[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results11 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals11 <- quantile(results11, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results11))
[1] 0.897488

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals11)
     2.5%     97.5% 
0.8902500 0.9045031 

> ##GPT3.5 Prediction
> gpt3.5 <- read.csv("./01_data/machine/GPT3.5_predictions_combined_final.csv")

> colnames(gpt3.5) <- paste0(colnames(gpt3.5), "_gpt3.5 ")

> colnames(gpt3.5)[1] <- "Index"

> combined_gpt3.5 <- plyr::join(test,gpt3.5, by = "Index")

> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined_gpt3.5[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results11 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals11 <- quantile(results11, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results11))
[1] 0.8910861

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals11)
     2.5%     97.5% 
0.8834937 0.8982500 

> ##GPT4 zeroshot Prediction
> gpt4_zero <- read.csv("./01_data/machine/zeroshot_20240806Model.csv")

> colnames(gpt4_zero ) <- paste0(colnames(gpt4_zero ), "_gpt4_zero ")

> colnames(gpt4_zero )[1] <- "Index"

> gpt4_zero <- gpt4_zero[,c(-2,-6,-8)] 

> combined_gpt4_zero  <- plyr::join(test,gpt4_zero , by = "Index")

> set.seed(12345)

> n_bootstrap <- 1000

> data <- combined_gpt4_zero[,c(4:19)]

> n_columns_per_pair <- 8

> # Bootstrap loop
> results11 <- replicate(n_bootstrap, {
+   bootstrap_sample <- data[sample(nrow(data), replace = TRUE), ]
+   calculate_accuracy(bootstrap_sample)
+ })

> confidence_intervals11 <- quantile(results11, c(0.025, 0.975))

> # Print the results
> print("Average Accuracy:")
[1] "Average Accuracy:"

> print(mean(results11))
[1] 0.8119395

> print("Confidence Intervals:")
[1] "Confidence Intervals:"

> print(confidence_intervals11)
     2.5%     97.5% 
0.8026219 0.8205000 

> ################################################################################
> ###################Plot#########################################################
> ################################################################################
> 
> 
> average <- read.csv("01_data/average_10.csv") ## Average accuracy over the eight categories

> average <- average[order(average$Accuracy_average,decreasing=TRUE, na.last=FALSE),]

> average$Coder <- factor(average$Coder, levels = c("SFT GPT-3","SFT GPT-4" , "SFT Llama","SFT GPT-3.5", "RoBERTa" ,"Majority","Majority(Mturk)" ,"Coder 3", "Coder 5", 
+                                                   "GPT-4 Zero-shot","Coder 4",  "Coder 1", "BOW Logit", "Coder 2" ,"Modal Class"))

> p <- ggplot(average, aes(average$Coder,average$Accuracy_average)) + 
+   geom_point(size = 3, color = c("#1b9e77","#1b9e77","#1b9e77","#1b9e77","#1b9e77","#d95f02","#d95f02","#fc8d62","#fc8d62","#1b9e77", "#fc8d62","#fc8d62","#1b9e77", "#fc8d62","#7570b3")) +
+   geom_errorbar(aes(ymin=average$lower, ymax=average$upper), size=1,width = 0.1, color = c("#1b9e77","#1b9e77","#1b9e77","#1b9e77","#1b9e77","#d95f02","#d95f02","#fc8d62","#fc8d62","#1b9e77", "#fc8d62","#fc8d62","#1b9e77", "#fc8d62","#7570b3")) +
+   labs(x="", y="Average Accuracy") +
+   geom_hline(yintercept=0.74760, size =.4,linetype= "dotted") +
+   theme_bw()+ 
+   theme(axis.line = element_line(colour = "grey"),panel.grid.major = element_blank(), panel.border = element_blank(),panel.grid.minor = element_blank())

> ggsave(
+   filename = "03_output/figure2.pdf",
+   plot     = p,
+   width    = 12,
+   height   = 6
+ )
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure2.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure3.R @ 2025-12-09 08:18:33 


> library(reshape2)

> library(MLmetrics)

> test <- read.csv("01_data/interestgroups_test.csv")

> davinci <- read.csv("01_data/machine/davinci.csv")

> test <- test %>% dplyr::select(-Security, -Tax)

> test$informative <- ifelse(rowSums(test[, c("Prediction", "Status_quo", "Specifics")]) > 0, 1, 0)

> test$persuasive <- ifelse(rowSums(test[, c("Sociotropic", "Human_right", "Environment", "Ideology", "Interest_groups")]) > 0, 1, 0)

> davinci$informative <- ifelse(rowSums(davinci[, c("Davinci_pred", "Davinci_status", "Davinci_specifics")]) > 0, 1, 0)

> davinci$persuasive <- ifelse(rowSums(davinci[, c("Davinci_socio", "Davinci_human", "Davinci_env", "Davinci_ideo", "Davinci_ig")]) > 0, 1, 0)

> y <- c()

> for(i in 1:2){
+   precision <- Precision(test[, i+11], davinci[, i+11], positive = 1)
+   recall <- Recall(test[, i+11],davinci[, i+11], positive = 1)
+   f1 <- F1_Score(test[, i+11], davinci[, i+11], positive = 1)
+   accuracy<- Accuracy(test[, i+11],davinci[, i+11])
+   tn <- table(test[, i+11], davinci[, i+11])[1]
+   fn <- table(test[, i+11], davinci[, i+11])[2]
+   fp <- table(test[, i+11], davinci[, i+11])[3]
+   tp <- table(test[, i+11], davinci[, i+11])[4]
+   my_vector <- c(tn,fn, fp,tp, precision, recall, f1, accuracy, as.character("aggregate"))
+   y <- as.data.frame(rbind(y, my_vector ))
+ }

> colnames(y) <- c("tn","fn", "fp", "tp",  "precision", "recall", "f1", "accuracy", "coder")

> y$category <- rep(c("Informative", "Association"),1)

> y <- as.data.frame(y)

> molted <- melt(y,id.vars=c("category"))

> molted <- molted[c(9:16),]

> molted$value <- signif(as.numeric(molted$value), digits = 3)

> molted <- dcast(molted,variable~category)

> molted$variable <- c("Precision","Recall","F1", "Accuracy")

> molted <- molted[, c(1,3,2)]

> par(mar = c(6, 4, 4, 2)) 

> pdf("03_output/figure3.pdf", width = 7, height = 5)

> barplot(
+   t(`row.names<-`(as.matrix(molted[-1]), molted$variable)),
+   legend = TRUE,
+   beside = TRUE,
+   col = c("#1b9e77", "#d95f02"),
+   ylim = c(0, 1),
+   args.legend = list(
+     x = "bottom",
+     horiz = TRUE,
+     inset = c(0, -0.25),  
+     box.lwd = 1           
+   )
+ )

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure3.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure4.R @ 2025-12-09 08:18:33 


> library(dplyr)

> library(tidyr)
G3;
Attaching package: ‘tidyr’

gG3;The following object is masked from ‘package:reshape2’:

    smiths

g
> library(dsl)
G3;dsl v0.1.0 successfully loaded. See ?dsl for help. Note this is an early alpha release and backwards compatability may not be maintained.
g
> library(reshape2)

> ##############################################################
> ###################Chamber plot###############################
> ##############################################################
> 
> chamber_merged <- read.csv("04_application/Chamber_merged.csv")

> chamber_merged$Category <- factor(chamber_merged$Category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                                   "Ideology", "Interest groups", "Informative", "Associative"))

> levels(chamber_merged$Category) <- c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                      "Ideology", "Interest groups", "Informative", "Associative")

> new_category_names <- c("Prediction", "Status-quo", "Specifics" , "Sociotropic",  "Human Right", "Environment",  "Ideology","Interest groups", "Informative", "Associative")

> reshaped_df <- chamber_merged %>%
+   dplyr::group_by(Category, doc) %>%
+   dplyr::summarise(count = sum(count), .groups = "drop") %>%
+   tidyr::pivot_wider(
+     names_from  = "doc",
+     values_from = "count",
+     values_fill = 0
+   )

> bar_matrix <- t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category))

> bar_colors <- c("#1b9e77", "#d95f02")  

> pdf("03_output/figure4a.pdf", width = 20, height = 8)

> bp<- barplot(
+   t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category)), 
+   beside = TRUE, 
+   col = c("#1b9e77", "#d95f02"), 
+   ylim = c(0, max(reshaped_df[, -1], na.rm = TRUE) + 10), 
+   legend = TRUE, # Add legend
+   args.legend = list(x = "bottom", horiz = TRUE, inset = c(-.7, -0.15),
+                      x.intersp = 0.5, 
+                      y.intersp = 0.5), 
+   names.arg = new_category_names,
+   cex.axis = 0.7) 

> bp_positions <- as.vector(bp)

> ordered_errors <- chamber_merged %>%
+   arrange(factor(Category, levels = levels(chamber_merged$Category)))

> print(data.frame(Category = ordered_errors$Category, Xpos = bp_positions))
          Category Xpos
1       Prediction  1.5
2       Prediction  2.5
3       Status-quo  4.5
4       Status-quo  5.5
5        Specifics  7.5
6        Specifics  8.5
7      Sociotropic 10.5
8      Sociotropic 11.5
9      Human Right 13.5
10     Human Right 14.5
11     Environment 16.5
12     Environment 17.5
13        Ideology 19.5
14        Ideology 20.5
15 Interest groups 22.5
16 Interest groups 23.5
17     Informative 25.5
18     Informative 26.5
19     Associative 28.5
20     Associative 29.5

> if (length(bp_positions) != nrow(ordered_errors)) {
+   stop("Error: Mismatch between bar positions and error bars. Check dataset alignment.")
+ }

> arrows(
+   x0 = bp_positions, x1 = bp_positions,  
+   y0 = ordered_errors$CI.Lower, y1 = ordered_errors$CI.Upper,  
+   angle = 90, code = 3, length = 0.05, col = "black"
+ )

> dev.off()
RStudioGD 
        2 

> ##############################################################
> ###################Chamber fitted plot###############################
> ##############################################################
> 
> chamber_merged <- read.csv("04_application/Chamber_merged_fitted.csv")

> chamber_merged$Category <- factor(chamber_merged$Category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                                                       "Ideology", "Interest groups", "Informative", "Associative"))

> levels(chamber_merged$Category) <- c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                      "Ideology", "Interest groups", "Informative", "Associative")

> new_category_names <- c("Prediction", "Status-quo", "Specifics" , "Sociotropic",  "Human Right", "Environment",  "Ideology","Interest groups", "Informative", "Associative")

> reshaped_df <- chamber_merged %>%
+   dplyr::group_by(Category, doc) %>%
+   dplyr::summarise(count = sum(count), .groups = "drop") %>%
+   tidyr::pivot_wider(
+     names_from = doc,
+     values_from = count,
+     values_fill = 0
+   )

> bar_matrix <- t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category))

> bar_colors <- c("#1b9e77", "#d95f02")  

> pdf("03_output/figure4b.pdf", width = 20, height = 8)

> bp<- barplot(
+   t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category)), 
+   beside = TRUE, 
+   col = c("#1b9e77", "#d95f02"), 
+   ylim = c(0, max(reshaped_df[, -1], na.rm = TRUE) + 10), 
+   legend = TRUE, # Add legend
+   args.legend = list(x = "bottom", horiz = TRUE, inset = c(-.7, -0.15),
+                      x.intersp = 0.5, 
+                      y.intersp = 0.5), 
+   names.arg = new_category_names,
+   cex.axis = 0.7) 

> bp_positions <- as.vector(bp)

> ordered_errors <- chamber_merged %>%
+   arrange(factor(Category, levels = levels(chamber_merged$Category)))

> print(data.frame(Category = ordered_errors$Category, Xpos = bp_positions))
          Category Xpos
1       Prediction  1.5
2       Prediction  2.5
3       Status-quo  4.5
4       Status-quo  5.5
5        Specifics  7.5
6        Specifics  8.5
7      Sociotropic 10.5
8      Sociotropic 11.5
9      Human Right 13.5
10     Human Right 14.5
11     Environment 16.5
12     Environment 17.5
13        Ideology 19.5
14        Ideology 20.5
15 Interest groups 22.5
16 Interest groups 23.5
17     Informative 25.5
18     Informative 26.5
19     Associative 28.5
20     Associative 29.5

> if (length(bp_positions) != nrow(ordered_errors)) {
+   stop("Error: Mismatch between bar positions and error bars. Check dataset alignment.")
+ }

> arrows(
+   x0 = bp_positions, x1 = bp_positions,  
+   y0 = ordered_errors$CI.Lower, y1 = ordered_errors$CI.Upper,  
+   angle = 90, code = 3, length = 0.05, col = "black"
+ )

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure4.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure5.R @ 2025-12-09 08:18:33 


> library(dplyr)

> library(tidyr)

> library(dsl)

> library(reshape2)

> #################################################
> ###################USTR plot#####################
> #################################################
> 
> ustr_merged <- read.csv("04_application/USTR_merged.csv")

> ustr_merged$Category <- factor(ustr_merged$Category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                                                 "Ideology", "Interest groups", "Informative", "Associative"))

> levels(ustr_merged$Category) <- c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                   "Ideology", "Interest groups", "Informative", "Associative")

> new_category_names <- c("Prediction", "Status-quo", "Specifics" , "Sociotropic",  "Human Right", "Environment",  "Ideology","Interest groups", "Informative", "Associative")

> reshaped_df <- ustr_merged %>%
+   dplyr::group_by(Category, doc) %>%
+   dplyr::summarise(count = sum(count), .groups = "drop") %>%
+   tidyr::pivot_wider(
+     names_from  = "doc",
+     values_from = "count",
+     values_fill = 0
+   )

> bar_matrix <- t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category))

> bar_colors <- c("#1b9e77", "#d95f02")  

> pdf("03_output/figure5a.pdf", width = 20, height = 8)

> bp<- barplot(
+   t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category)), 
+   beside = TRUE, 
+   col = c("#1b9e77", "#d95f02"), 
+   ylim = c(0, max(reshaped_df[, -1], na.rm = TRUE) + 10), 
+   legend = TRUE, # Add legend
+   args.legend = list(x = "bottom", horiz = TRUE, inset = c(-.7, -0.15),
+                      x.intersp = 0.5, 
+                      y.intersp = 0.5), 
+   names.arg = new_category_names,
+   cex.axis = 0.7) 

> bp_positions <- as.vector(bp)

> ordered_errors <- ustr_merged%>%
+   arrange(factor(Category, levels = levels(ustr_merged$Category)))

> print(data.frame(Category = ordered_errors$Category, Xpos = bp_positions))
          Category Xpos
1       Prediction  1.5
2       Prediction  2.5
3       Status-quo  4.5
4       Status-quo  5.5
5        Specifics  7.5
6        Specifics  8.5
7      Sociotropic 10.5
8      Sociotropic 11.5
9      Human Right 13.5
10     Human Right 14.5
11     Environment 16.5
12     Environment 17.5
13        Ideology 19.5
14        Ideology 20.5
15 Interest groups 22.5
16 Interest groups 23.5
17     Informative 25.5
18     Informative 26.5
19     Associative 28.5
20     Associative 29.5

> if (length(bp_positions) != nrow(ordered_errors)) {
+   stop("Error: Mismatch between bar positions and error bars. Check dataset alignment.")
+ }

> arrows(
+   x0 = bp_positions, x1 = bp_positions,  
+   y0 = ordered_errors$CI.Lower, y1 = ordered_errors$CI.Upper,  
+   angle = 90, code = 3, length = 0.05, col = "black"
+ )

> dev.off()
RStudioGD 
        2 

> ########################################################
> ###################USTR fitted plot#####################
> ########################################################
> 
> 
> ustr_merged <- read.csv("04_application/USTR_merged_fitted.csv")

> ustr_merged$Category <- factor(ustr_merged$Category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                                                 "Ideology", "Interest groups", "Informative", "Associative"))

> levels(ustr_merged$Category) <- c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human Right", "Environment", 
+                                   "Ideology", "Interest groups", "Informative", "Associative")

> new_category_names <- c("Prediction", "Status-quo", "Specifics" , "Sociotropic",  "Human Right", "Environment",  "Ideology","Interest groups", "Informative", "Associative")

> reshaped_df <- ustr_merged %>%
+   dplyr::group_by(Category, doc) %>%
+   dplyr::summarise(count = sum(count), .groups = "drop") %>%
+   tidyr::pivot_wider(
+     names_from  = "doc",
+     values_from = "count",
+     values_fill = 0
+   )

> bar_matrix <- t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category))

> bar_colors <- c("#1b9e77", "#d95f02")  # Colors for different document types

> pdf("03_output/figure5b.pdf", width = 20, height = 8)

> bp<- barplot(
+   t(`row.names<-`(as.matrix(reshaped_df[, -1]), reshaped_df$Category)), 
+   beside = TRUE, 
+   col = c("#1b9e77", "#d95f02"),
+   ylim = c(0, max(reshaped_df[, -1], na.rm = TRUE) + 10), 
+   legend = TRUE, # Add legend
+   args.legend = list(x = "bottom", horiz = TRUE, inset = c(-.7, -0.15),
+                      x.intersp = 0.5, 
+                      y.intersp = 0.5), 
+   names.arg = new_category_names,
+   cex.axis = 0.7) 

> bp_positions <- as.vector(bp)

> ordered_errors <- ustr_merged%>%
+   arrange(factor(Category, levels = levels(ustr_merged$Category)))

> print(data.frame(Category = ordered_errors$Category, Xpos = bp_positions))
          Category Xpos
1       Prediction  1.5
2       Prediction  2.5
3       Status-quo  4.5
4       Status-quo  5.5
5        Specifics  7.5
6        Specifics  8.5
7      Sociotropic 10.5
8      Sociotropic 11.5
9      Human Right 13.5
10     Human Right 14.5
11     Environment 16.5
12     Environment 17.5
13        Ideology 19.5
14        Ideology 20.5
15 Interest groups 22.5
16 Interest groups 23.5
17     Informative 25.5
18     Informative 26.5
19     Associative 28.5
20     Associative 29.5

> if (length(bp_positions) != nrow(ordered_errors)) {
+   stop("Error: Mismatch between bar positions and error bars. Check dataset alignment.")
+ }

> arrows(
+   x0 = bp_positions, x1 = bp_positions,  
+   y0 = ordered_errors$CI.Lower, y1 = ordered_errors$CI.Upper,  
+   angle = 90, code = 3, length = 0.05, col = "black"
+ )

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figure5.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureC1.R @ 2025-12-09 08:18:33 


> library(plotly)
G3;
Attaching package: ‘plotly’

gG3;The following objects are masked from ‘package:plyr’:

    arrange, mutate, rename, summarise

gG3;The following object is masked from ‘package:ggplot2’:

    last_plot

gG3;The following object is masked from ‘package:stats’:

    filter

gG3;The following object is masked from ‘package:graphics’:

    layout

g
> library(webshot2)

> #remotes::install_github("rstudio/webshot2")
> 
> save_plotly_pdf <- function(plot, pdf_path, html_path = NULL) {
+   if (is.null(html_path)) {
+     html_path <- tempfile(fileext = ".html")
+   }
+   
+   htmlwidgets::saveWidget(as_widget(plot),
+                           file = html_path,
+                           selfcontained = TRUE)
+   
+   webshot2::webshot(
+     url = html_path,
+     file = pdf_path,
+     zoom = 2
+   )
+   
+   message("Saved PDF to: ", pdf_path)
+ }

> fewshot<- read.csv("01_data/fewshot/fewshot_prediction.csv")

> fewshot$X <- c(110, 145,166,187,200)

> fewshot$X <- as.character(fewshot$X)

> fig <- plot_ly(
+   x = ~fewshot$X,
+   y = ~fewshot$F1,
+   type = 'bar',
+   text = ~c(0, 0.066,0.306,0.554,0.494),
+   textposition = 'outside')

> fig<- fig %>% layout(title = '', xaxis = list(title = 'Number of Paragraphs Used in Few-Shot Learning'), 
+                yaxis = list(title = 'F1 score', range = list(0, 1)), legend = list(title=list(text='<b> Few Shot </b>'),
+                                                                              orientation = "h",
+                                                                              xanchor = "center",
+                                                                              x = 0.5))

> save_plotly_pdf(fig, "03_output/figureC1.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f76e064db.html screenshot completed
gG3;Saved PDF to: 03_output/figureC1.pdf
g<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureC1.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD1.R @ 2025-12-09 08:18:35 


> test <- read.csv("01_data/interestgroups_test.csv")

> test <- subset(test, select = -c(X, Security, Tax))

> coder1 <- read.csv("01_data/upwork_annotation/testing_Ashton.csv")

> coder2 <- read.csv("01_data/upwork_annotation/testing_Dawn.csv")

> coder3 <- read.csv("01_data/upwork_annotation/testing_Dmitry.csv")

> coder4 <- read.csv("01_data/upwork_annotation/testing_Michael.csv")

> coder5 <- read.csv("01_data/upwork_annotation/testing_Shea.csv")

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,51:58)]

> combined <- plyr::join(test, merged, by = "Index")

> pred1<- subset(combined, combined$Prediction ==0)

> status1<- subset(combined, combined$Status_quo ==0)

> spec1 <- subset(combined, combined$Specifics ==0)

> socio1 <- subset(combined, combined$Sociotropic==0)

> env1 <- subset(combined, combined$Environment ==0)

> human1 <- subset(combined, combined$Human_right==0)

> ideo1 <- subset(combined, combined$Ideology ==0)

> ig1 <- subset(combined, combined$Interest_groups ==0)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD1.pdf", width = 8, height = 9)

> par(mfrow = c(4, 2))

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD1.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD2.R @ 2025-12-09 08:18:35 


> test <- read.csv("01_data/interestgroups_test.csv")

> test <- subset(test, select = -c(X, Security, Tax))

> coder1 <- read.csv("01_data/upwork_annotation/testing_Ashton.csv")

> coder2 <- read.csv("01_data/upwork_annotation/testing_Dawn.csv")

> coder3 <- read.csv("01_data/upwork_annotation/testing_Dmitry.csv")

> coder4 <- read.csv("01_data/upwork_annotation/testing_Michael.csv")

> coder5 <- read.csv("01_data/upwork_annotation/testing_Shea.csv")

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,51:58)]

> combined <- plyr::join(test, merged, by = "Index")

> pred1<- subset(combined, combined$Prediction ==1)

> status1<- subset(combined, combined$Status_quo ==1)

> spec1 <- subset(combined, combined$Specifics ==1)

> socio1 <- subset(combined, combined$Sociotropic==1)

> env1 <- subset(combined, combined$Environment ==1)

> human1 <- subset(combined, combined$Human_right==1)

> ideo1 <- subset(combined, combined$Ideology ==1)

> ig1 <- subset(combined, combined$Interest_groups ==1)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD2.pdf", width = 8, height = 9)

> par(mfrow = c(4, 2))

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD2.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD3.R @ 2025-12-09 08:18:35 


> library(dplyr)

> library(plyr) 

> require(caret)

> library(plotly) 

> library(remotes)

> library(webshot2)

> #install.packages("remotes")
> #remotes::install_github("rstudio/webshot2")
> 
> test <- read.csv("01_data/interestgroups_test.csv")

> test <- subset(test, select = -c(X, Security, Tax))

> coder1 <- read.csv("01_data/upwork_annotation/testing_Ashton.csv")

> coder2 <- read.csv("01_data/upwork_annotation/testing_Dawn.csv")

> coder3 <- read.csv("01_data/upwork_annotation/testing_Dmitry.csv")

> coder4 <- read.csv("01_data/upwork_annotation/testing_Michael.csv")

> coder5 <- read.csv("01_data/upwork_annotation/testing_Shea.csv")

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,51:58)]

> merged$pred_maj <- ifelse(merged$pred_total>=3,1,0)

> merged$status_maj <- ifelse(merged$status_total>=3,1,0)

> merged$sepc_maj <- ifelse(merged$spec_total>=3,1,0)

> merged$socio_maj <- ifelse(merged$socio_total>=3,1,0)

> merged$human_maj <- ifelse(merged$human_total>=3,1,0)

> merged$env_maj <- ifelse(merged$env_total>=3,1,0)

> merged$ideo_maj <- ifelse(merged$ideo_total>=3,1,0)

> merged$ig_maj <- ifelse(merged$ig_total>=3,1,0)

> majority <- merged[,c(1,11:18)]

> davinci <- read.csv("01_data/machine/davinci.csv")

> combined  <- plyr::join(davinci, merged,   by = "Index")   # davinci + majority

> combined1 <- plyr::join(test,    coder1,   by = "Index")

> combined2 <- plyr::join(test,    coder2,   by = "Index")

> combined3 <- plyr::join(test,    coder3,   by = "Index")

> combined4 <- plyr::join(test,    coder4,   by = "Index")

> combined5 <- plyr::join(test,    coder5,   by = "Index")

> combined7 <- plyr::join(test,    majority, by = "Index")   # majority only

> categories <- c("Prediction", "Status-quo", "Specifics", "Sociotropic",
+                 "Human/labor Right", "Environment", "Ideology", "Interest groups")

> compute_metrics <- function(truth_df, pred_df, truth_offset, pred_offset, label) {
+   out <- vector("list", length(categories))
+   for (i in seq_along(categories)) {
+     y_true <- truth_df[, i + truth_offset]
+     y_pred <- pred_df[,   i + pred_offset]
+     
+     out[[i]] <- data.frame(
+       precision = Precision(y_true, y_pred, positive = 1),
+       recall    = Recall(y_true,    y_pred, positive = 1),
+       f1        = F1_Score(y_true,  y_pred, positive = 1),
+       accuracy  = Accuracy(y_true,  y_pred),
+       coder     = label,
+       category  = categories[i],
+       stringsAsFactors = FALSE
+     )
+   }
+   dplyr::bind_rows(out)
+ }

> # Davinci vs gold labels in test
> metrics_davinci <- compute_metrics(
+   truth_df     = test,
+   pred_df      = davinci,
+   truth_offset = 2,   # test[, i+2]
+   pred_offset  = 3,   # davinci[, i+3]
+   label        = "Davinci"
+ )

> # Coder 1–5 vs gold labels
> metrics_c1 <- compute_metrics(
+   truth_df     = combined1,
+   pred_df      = combined1,
+   truth_offset = 2,   # combined1[, i+2]
+   pred_offset  = 11,  # combined1[, i+11]
+   label        = "Coder 1"
+ )

> metrics_c2 <- compute_metrics(
+   truth_df     = combined2,
+   pred_df      = combined2,
+   truth_offset = 2,
+   pred_offset  = 11,
+   label        = "Coder 2"
+ )

> metrics_c3 <- compute_metrics(
+   truth_df     = combined3,
+   pred_df      = combined3,
+   truth_offset = 2,
+   pred_offset  = 11,
+   label        = "Coder 3"
+ )

> metrics_c4 <- compute_metrics(
+   truth_df     = combined4,
+   pred_df      = combined4,
+   truth_offset = 2,
+   pred_offset  = 11,
+   label        = "Coder 4"
+ )

> metrics_c5 <- compute_metrics(
+   truth_df     = combined5,
+   pred_df      = combined5,
+   truth_offset = 2,
+   pred_offset  = 11,
+   label        = "Coder 5"
+ )

> # Davinci + Majority (indicator)
> pred    <- ifelse(combined$pred_maj  == 0 & combined$Davinci_pred      == 0, 0, 1)

> status  <- ifelse(combined$status_maj== 0 & combined$Davinci_status    == 0, 0, 1)

> specifics <- ifelse(combined$sepc_maj== 0 & combined$Davinci_specifics == 0, 0, 1)

> socio   <- ifelse(combined$socio_maj == 0 & combined$Davinci_socio     == 0, 0, 1)

> env     <- ifelse(combined$env_maj   == 0 & combined$Davinci_env       == 0, 0, 1)

> human   <- ifelse(combined$human_maj == 0 & combined$Davinci_human     == 0, 0, 1)

> ideo    <- ifelse(combined$ideo_maj  == 0 & combined$Davinci_ideo      == 0, 0, 1)

> ig      <- ifelse(combined$ig_maj    == 0 & combined$Davinci_ig        == 0, 0, 1)

> ind <- as.data.frame(cbind(
+   Index = combined$Index,
+   pred, status, specifics, socio, human, env, ideo, ig
+ ))

> colnames(ind)[-1] <- paste0(categories, "_ind")

> metrics_davinci_maj <- compute_metrics(
+   truth_df     = test,
+   pred_df      = ind,
+   truth_offset = 2,   # test[, i+2]
+   pred_offset  = 1,   # ind[, i+1]
+   label        = "Davinci+Majority"
+ )

> # Majority only
> metrics_maj <- compute_metrics(
+   truth_df     = test,
+   pred_df      = combined7,
+   truth_offset = 2,   # test[, i+2]
+   pred_offset  = 10,  # combined7[, i+10]
+   label        = "Majority"
+ )

> metrics <- bind_rows(
+   metrics_davinci_maj,
+   metrics_davinci,
+   metrics_maj,
+   metrics_c1,
+   metrics_c2,
+   metrics_c3,
+   metrics_c4,
+   metrics_c5
+ )

> metrics$coder <- factor(
+   metrics$coder,
+   levels = c("Davinci+Majority", "Davinci", "Majority",
+              "Coder 1", "Coder 2", "Coder 3", "Coder 4", "Coder 5")
+ )

> metrics$category <- factor(
+   metrics$category,
+   levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic",
+              "Human/labor Right", "Environment", "Ideology", "Interest groups")
+ )

> metrics <- metrics %>%
+   mutate(
+     accuracy  = signif(as.numeric(accuracy),  digits = 3),
+     precision = signif(as.numeric(precision), digits = 3),
+     recall    = signif(as.numeric(recall),    digits = 3),
+     f1        = signif(as.numeric(f1),        digits = 3)
+   )

> save_plotly_pdf <- function(plot, pdf_path, html_path = NULL) {
+   if (is.null(html_path)) {
+     html_path <- tempfile(fileext = ".html")
+   }
+   
+   htmlwidgets::saveWidget(as_widget(plot),
+                           file = html_path,
+                           selfcontained = TRUE)
+   
+   webshot2::webshot(
+     url = html_path,
+     file = pdf_path,
+     zoom = 2
+   )
+   
+   message("Saved PDF to: ", pdf_path)
+ }

> metrics$category <- factor(
+   metrics$category,
+   levels = c(
+     "Prediction",
+     "Status-quo",
+     "Specifics",
+     "Sociotropic",
+     "Human/labor Right",
+     "Environment",
+     "Ideology",
+     "Interest groups"
+   )
+ )

> fig <- plot_ly(x= metrics$category, y=~metrics$accuracy, group=~ metrics$coder,
+                type="scatter", color=~metrics$coder, mode="lines+markers")

> fig<- fig  %>% layout(title = '', xaxis = list(title = 'category'), 
+                 yaxis = list(title = 'Accuracy'),showlegend = T,legend = list(title=list(text='Annotations')))

> save_plotly_pdf(fig, "03_output/figureD3a.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f1b7ab323.html screenshot completed
gG3;Saved PDF to: 03_output/figureD3a.pdf
g
> fig2 <- plot_ly(x= metrics$category, y=~metrics$f1, group=~ metrics$coder,
+                 type="scatter", color=~metrics$coder, mode="lines+markers")

> fig2 <- fig2 %>% layout(title = '', xaxis = list(title = 'category'), 
+                 yaxis = list(title = 'F1'),legend = list(title=list(text='Annotations')))

> save_plotly_pdf(fig2, "03_output/figureD3b.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f7e25d18c.html screenshot completed
gG3;Saved PDF to: 03_output/figureD3b.pdf
g
> fig3 <- plot_ly(x= metrics$category, y=~metrics$recall, group=~ metrics$coder,
+                 type="scatter", color=~metrics$coder,showlegend = T, mode="lines+markers")

> fig3 <-  fig3 %>% layout(title = '', xaxis = list(title = 'category'), 
+                 yaxis = list(title = 'Recall'),legend = list(title=list(text='Annotations')))

> save_plotly_pdf(fig3, "03_output/figureD3c.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f13e1140.html screenshot completed
gG3;Saved PDF to: 03_output/figureD3c.pdf
g
> fig4 <- plot_ly(x= metrics$category, y=~ metrics$precision, group=~ metrics$coder,
+                 type="scatter", color=~metrics$coder, mode="lines+markers")

> fig4 <- fig4 %>% layout(title = '', xaxis = list(title = 'category'), 
+                 yaxis = list(title = 'Precision'),showlegend = T,legend = list(title=list(text='Annotations')))

> save_plotly_pdf(fig4, "03_output/figureD3d.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f16e3f49a.html screenshot completed
gG3;Saved PDF to: 03_output/figureD3d.pdf
g<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD3.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD4.R @ 2025-12-09 08:18:38 


> library(dplyr)

> library(plotly)

> library(remotes)

> library(webshot2)

> save_plotly_pdf <- function(plot, pdf_path, html_path = NULL) {
+   if (is.null(html_path)) {
+     html_path <- tempfile(fileext = ".html")
+   }
+   
+   htmlwidgets::saveWidget(as_widget(plot),
+                           file = html_path,
+                           selfcontained = TRUE)
+   
+   webshot2::webshot(
+     url = html_path,
+     file = pdf_path,
+     zoom = 2
+   )
+   
+   message("Saved PDF to: ", pdf_path)
+ }

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- subset(test, select = -c(X, Security, Tax))

> davinci <- read.csv("01_data/machine/davinci.csv")

> y <- c()

> for(i in 1:8){
+   precision <- Precision(test[, i+2], davinci[, i+3], positive = 1)
+   recall <- Recall(test[, i+2],davinci[, i+3], positive = 1)
+   f1 <- F1_Score(test[, i+2], davinci[, i+3], positive = 1)
+   accuracy<- Accuracy(test[, i+2],davinci[, i+3])
+   tn <- table(test[, i+2], davinci[, i+3])[1]
+   fn <- table(test[, i+2], davinci[, i+3])[2]
+   fp <- table(test[, i+2], davinci[, i+3])[3]
+   tp <- table(test[, i+2], davinci[, i+3])[4]
+   my_vector <- c(tn,fn, fp,tp, precision, recall, f1, accuracy, as.character("Davinci"))
+   y <- as.data.frame(rbind(y, my_vector ))
+ }

> colnames(y) <- c("tn","fn", "fp", "tp",  "precision", "recall", "f1", "accuracy", "coder")

> y$category <- c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human/labor Right", "Environment", 
+                 "Ideology", "Interest groups")

> metrics <- y

> metrics$accuracy <- signif(as.numeric(metrics$accuracy), digits = 3)

> metrics$precision <- signif(as.numeric(metrics$precision), digits = 3)

> metrics$recall <- signif(as.numeric(metrics$recall), digits = 3)

> metrics$f1 <- signif(as.numeric(metrics$f1), digits = 3)

> metrics$category <- factor(metrics$category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human/labor Right", "Environment", 
+                                                         "Ideology", "Interest groups"))

> fig <- plot_ly(
+   data = metrics,
+   x = ~category,
+   y = ~f1,
+   type = "bar",
+   showlegend = TRUE,
+   name = "F1",
+   marker = list(color = "rgb(238,121,159)")
+ ) %>%
+   add_trace(
+     y = ~accuracy,
+     name = "Accuracy",
+     marker = list(color = "rgb(238,203,173)")
+   ) %>%
+   add_trace(
+     y = ~precision,
+     name = "Precision",
+     marker = list(color = "rgb(238,169,184)")
+   ) %>%
+   add_trace(
+     y = ~recall,
+     name = "Recall",
+     marker = list(color = "rgb(238,174,238)")
+   ) %>%
+   layout(
+     xaxis = list(title = ""),
+     yaxis = list(title = "Score", range = c(0, 1)),
+     legend = list(
+       title = list(text = "<b> Metrics </b>"),
+       orientation = "h",
+       xanchor = "center",
+       x = 0.5
+     ),
+     barmode = "group"
+   )

> save_plotly_pdf(fig, "03_output/figureD4.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f3af39ece.html screenshot completed
gG3;Saved PDF to: 03_output/figureD4.pdf
g<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD4.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD5.R @ 2025-12-09 08:18:38 


> library(dplyr)

> library(plotly)

> library(remotes)

> library(webshot2)

> require(caret)

> library(plyr)

> #install.packages("remotes")
> #remotes::install_github("rstudio/webshot2")
> 
> save_plotly_pdf <- function(plot, pdf_path, html_path = NULL) {
+   if (is.null(html_path)) {
+     html_path <- tempfile(fileext = ".html")
+   }
+   
+   htmlwidgets::saveWidget(as_widget(plot),
+                           file = html_path,
+                           selfcontained = TRUE)
+   
+   webshot2::webshot(
+     url = html_path,
+     file = pdf_path,
+     zoom = 2
+   )
+   
+   message("Saved PDF to: ", pdf_path)
+ }

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- subset(test, select = -c(X, Security, Tax))

> roberta<- read.csv("./01_data/machine/robertapredictions.csv")

> davinci <- read.csv("01_data/machine/davinci.csv")

> y <- c()

> for(i in 1:8){
+   accuracy<- Accuracy(test[, i+2],davinci[, i+3])
+   my_vector <- c(accuracy, as.character("GPT-3"))
+   #my_vector <- c(f1,as.character("Davinci"))
+   y <- as.data.frame(rbind(y, my_vector ))
+ }

> my_vector <- c()

> y1 <- c()

> for(i in 1:8){
+   accuracy<- Accuracy(test[, i+2],roberta[, i+1])
+   my_vector <- c(accuracy, as.character("RoBERTa"))
+   y1 <- as.data.frame(rbind(y1, my_vector ))
+ }

> y2 <- c()

> combined <- plyr::join(test, majority, by = "Index") 

> for(i in 1:8){
+   accuracy<- Accuracy(test[, i+2], combined[, i+10])
+   my_vector <- c(accuracy, as.character("Majority"))
+   y2 <- as.data.frame(rbind(y2, my_vector ))
+ }

> metrics<- rbind(y, y1, y2)

> colnames(metrics) <- c("accuracy", "coder")

> metrics$category <- rep(c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human/labor Right", "Environment", 
+                           "Ideology", "Interest groups"),3)

> rownames(metrics) <- seq(1:24)

> metrics$accuracy <- signif(as.numeric(metrics$accuracy), digits = 3)

> metrics$category <- factor(metrics$category, levels = c("Prediction", "Status-quo", "Specifics", "Sociotropic", "Human/labor Right", "Environment", 
+                                                         "Ideology", "Interest groups"))

> fig <- plot_ly(x= metrics$category, y=~metrics$accuracy, type="bar",showlegend = T, color = metrics$coder, colors ="Paired")%>% 
+   layout(yaxis = list(title = 'Accuracy',range = c(0,1)),legend = list(title=list(text='<b> Metrics </b>'),
+                                                                          orientation = "h",
+                                                                          xanchor = "center",
+                                                                          x = 0.5) ,barmode = 'group')

> save_plotly_pdf(fig, "03_output/figureD5.pdf")
G3;file:////private/var/folders/jp/bwz6mdls5cqgkchbjw_d8dnm0000gq/T/RtmpXOks4A/file17d2f190153a1.html screenshot completed
gG3;Saved PDF to: 03_output/figureD5.pdf
g<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD5.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD6.R @ 2025-12-09 08:18:39 


> #install.packages("readxl")
> library(readxl)

> test <- read.csv("01_data/machine/chamber_lettertocongress_gpt.csv")

> coder1 <- read_excel("01_data/upwork_annotation/congressletters_week1_ashton.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder2 <- read_excel("01_data/upwork_annotation/congressletters_week1_dawn.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                New names:
• `` -> `...4`

> coder3 <- read_excel("01_data/upwork_annotation/congressletters_week1_dmitry.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                New names:
• `` -> `...4`

> coder4 <- read_excel("01_data/upwork_annotation/congressletters_week1_Mellody.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                New names:
• `` -> `...4`

> coder5 <- read_excel("01_data/upwork_annotation/congressletters_week1_nelson.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                New names:
• `` -> `...4`

> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,4,71:78)]

> combined <- plyr::join(test, merged, by = "Index")

> pred1<- subset(combined, combined$Davinci_pred ==0)

> status1<- subset(combined, combined$Davinci_status ==0)

> spec1 <- subset(combined, combined$Davinci_specifics ==0)

> socio1 <- subset(combined, combined$Davinci_socio==0)

> env1 <- subset(combined, combined$Davinci_env ==0)

> human1 <- subset(combined, combined$Davinci_human==0)

> ideo1 <- subset(combined, combined$Davinci_ideo ==0)

> ig1 <- subset(combined, combined$Davinci_ig ==0)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD6.pdf", width = 8, height = 9)

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD6.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD7.R @ 2025-12-09 08:18:39 


> test <- read.csv("01_data/machine/chamber_pressrelease_gpt.csv")

> coder1 <- read_excel("01_data/upwork_annotation/week2_chamberpress_asthon.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder2 <- read_excel("01_data/upwork_annotation/week2_chamberpress_dawn.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder3 <- read_excel("01_data/upwork_annotation/week2_chamberpress_dmitry.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder4 <- read_excel("01_data/upwork_annotation/week2_chamberpress_Mellody.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder5 <- read_excel("01_data/upwork_annotation/week2_chamberpress_shea.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,52:59)]

> combined <- plyr::join(test, merged, by = "Index")

> pred1<- subset(combined, combined$Davinci_pred ==0)

> status1<- subset(combined, combined$Davinci_status ==0)

> spec1 <- subset(combined, combined$Davinci_specifics ==0)

> socio1 <- subset(combined, combined$Davinci_socio==0)

> env1 <- subset(combined, combined$Davinci_env ==0)

> human1 <- subset(combined, combined$Davinci_human==0)

> ideo1 <- subset(combined, combined$Davinci_ideo ==0)

> ig1 <- subset(combined, combined$Davinci_ig ==0)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD7.pdf", width = 8, height = 9)

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD7.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD8.R @ 2025-12-09 08:18:39 


> test <- read.csv("01_data/machine/pressrelease_ustr_gpt.csv")

> coder1 <- read_excel("01_data/upwork_annotation/week3_pressrelease_ashton.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder2 <- read_excel("01_data/upwork_annotation/week3_pressrelease_ustr_dawn.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder3 <- read_excel("01_data/upwork_annotation/week3_pressrelease_ustr_dmitry.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder4 <- read_excel("01_data/upwork_annotation/week3_pressrelease_ustr_michael.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder5 <- read_excel("01_data/upwork_annotation/week3_pressrelease_ustr_Shea.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder1)[1] <- "Index"

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder2)[1] <- "Index"

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder3)[1] <- "Index"

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder4)[1] <- "Index"

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> colnames(coder5)[1] <- "Index"

> merged <- data.frame(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,3,57:64)]

> combined <- plyr::join(test, merged, by = "Index")

> pred1<- subset(combined, combined$Davinci_pred ==0)

> status1<- subset(combined, combined$Davinci_status ==0)

> spec1 <- subset(combined, combined$Davinci_specifics ==0)

> socio1 <- subset(combined, combined$Davinci_socio==0)

> env1 <- subset(combined, combined$Davinci_env ==0)

> human1 <- subset(combined, combined$Davinci_human==0)

> ideo1 <- subset(combined, combined$Davinci_ideo ==0)

> ig1 <- subset(combined, combined$Davinci_ig ==0)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD8.pdf", width = 8, height = 9)

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD8.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD9.R @ 2025-12-09 08:18:39 


> test <- read.csv("01_data/machine/congressionalhearings_ustr_gpt.csv")

> coder1 <- read_excel("01_data/upwork_annotation/week4_congressionalhearings_ustr_ashton.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder2 <- read_excel("01_data/upwork_annotation/week4_congressionalhearings_ustr_dawn.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder3 <- read_excel("01_data/upwork_annotation/week4_congressionalhearings_ustr_dmitry.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder4 <- read_excel("01_data/upwork_annotation/week4_congressional hearings_ustr_michael.xls")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> coder5 <- read_excel("01_data/upwork_annotation/week4_congressional hearings_ustr_shea.xlsx")
-/                                                                                                                                /                                                                                                                                -                                                                                                                                
> colnames(coder1) <- paste0(colnames(coder1), "_coder1")

> colnames(coder2) <- paste0(colnames(coder2), "_coder2") 

> colnames(coder3) <- paste0(colnames(coder3), "_coder3") 

> colnames(coder4) <- paste0(colnames(coder4), "_coder4") 

> colnames(coder5) <- paste0(colnames(coder5), "_coder5") 

> merged <- cbind(coder1,coder2,coder3,coder4,coder5)

> merged$pred_total<- rowSums(merged[, c("Prediction_coder1" ,"Prediction_coder2" ,"Prediction_coder3",
+                                        "Prediction_coder4" ,"Prediction_coder5" )])

> merged$status_total<- rowSums(merged[, c("Status_quo_coder1" ,"Status_quo_coder2" ,"Status_quo_coder3",
+                                          "Status_quo_coder4" ,"Status_quo_coder5" )])

> merged$spec_total<- rowSums(merged[, c("Specifics_coder1" ,"Specifics_coder2" ,"Specifics_coder3",
+                                        "Specifics_coder4" ,"Specifics_coder5" )])

> merged$socio_total <- rowSums(merged[, c("Sociotropic_coder1" ,"Sociotropic_coder2" ,"Sociotropic_coder3",
+                                          "Sociotropic_coder4" ,"Sociotropic_coder5" )])

> merged$human_total <- rowSums(merged[, c("Human_right_coder1" ,"Human_right_coder2" ,"Human_right_coder3",
+                                          "Human_right_coder4" ,"Human_right_coder5" )])

> merged$env_total <- rowSums(merged[, c("Environment_coder1" ,"Environment_coder2" ,"Environment_coder3",
+                                        "Environment_coder4" ,"Environment_coder5" )])

> merged$ideo_total <- rowSums(merged[, c("Ideology_coder1" ,"Ideology_coder2" ,"Ideology_coder3",
+                                         "Ideology_coder4" ,"Ideology_coder5" )])

> merged$ig_total <- rowSums(merged[, c("Interest_groups_coder1" ,"Interest_groups_coder2" ,"Interest_groups_coder3",
+                                       "Interest_groups_coder4" ,"Interest_groups_coder5" )])

> merged <- merged[,c(1,2,52:59)]

> combined <- cbind(test, merged)

> pred1<- subset(combined, combined$Davinci_pred ==0)

> status1<- subset(combined, combined$Davinci_status ==0)

> spec1 <- subset(combined, combined$Davinci_specifics ==0)

> socio1 <- subset(combined, combined$Davinci_socio==0)

> env1 <- subset(combined, combined$Davinci_env ==0)

> human1 <- subset(combined, combined$Davinci_human==0)

> ideo1 <- subset(combined, combined$Davinci_ideo ==0)

> ig1 <- subset(combined, combined$Davinci_ig ==0)

> pred1 <- pred1 %>% group_by(pred1$pred_total) %>%
+   summarize(n=n())

> status1 <- status1 %>% group_by(status1$status_total) %>%
+   summarize(n=n())

> spec1 <- spec1 %>% group_by(spec1$spec_total) %>%
+   summarize(n=n())

> socio1 <- socio1 %>% group_by(socio1$socio_total) %>%
+   summarize(n=n())

> env1 <- env1 %>% group_by(env1$env_total) %>%
+   summarize(n=n())

> human1 <- human1 %>% group_by(human1$human_total) %>%
+   summarize(n=n())

> ideo1<- ideo1 %>% group_by(ideo1$ideo_total) %>%
+   summarize(n=n())

> ig1 <- ig1  %>% group_by(ig1$ig_total) %>%
+   summarize(n=n())

> pdf("03_output/figureD9.pdf", width = 8, height = 9)

> par(mfrow=c(4,2))

> barplot(pred1$n, names =pred1$`pred1$pred_total`, xlab = "Prediction")

> barplot(status1$n, names =status1$`status1$status_total`,xlab = "Status-quo")

> barplot(spec1$n, names =spec1$`spec1$spec_total`,xlab = "Specifics")

> barplot(socio1$n, names =socio1$`socio1$socio_total`,xlab = "Sociotropic")

> barplot(env1$n, names = env1$`env1$env_total`,xlab = "Environment")

> barplot(human1$n, names = human1$`human1$human_total`,xlab = "Human right")

> barplot(ideo1$n, names = ideo1$`ideo1$ideo_total`,xlab = "Ideology")

> barplot(ig1$n, names = ig1$`ig1$ig_total`,xlab = "Interest Groups")

> dev.off()
RStudioGD 
        2 
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureD9.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureE1.R @ 2025-12-09 08:18:39 


> library(ggplot2)

> library(dplyr)

> library(tidyr)

> average <- read.csv("01_data/average_10.csv")

> average2 <- read.csv("01_data/expert/average_10_v2.csv")

> comparison_df <- left_join(average, average2, by = "Coder", suffix = c("_old", "_new"))

> comparison_df$Coder <- factor(comparison_df$Coder,
+                               levels = comparison_df$Coder[order(comparison_df$Accuracy_average_old, decreasing = TRUE)])

> average$Source <- "Original"

> average2$Source <- "Expert Panel"

> combined_df <- bind_rows(average, average2)

> combined_df$Source <- factor(combined_df$Source, levels = c("Original", "Expert Panel"))

> combined_df$Coder <- factor(combined_df$Coder, levels = c(
+   "SFT GPT-3", "SFT GPT-4", "SFT Llama", "SFT GPT-3.5", "RoBERTa",
+   "Majority", "Majority(Mturk)", "Coder 3", "Coder 5",
+   "GPT-4 Zero-shot", "Coder 4", "Coder 1", "BOW Logit", "Coder 2", "Modal Class"))

> coder_colors <- c(
+   "SFT GPT-3" = "#1b9e77", "SFT GPT-4" = "#1b9e77", "SFT Llama" = "#1b9e77", "SFT GPT-3.5" = "#1b9e77",
+   "RoBERTa" = "#d95f02", "Majority" = "#d95f02", "Majority(Mturk)" = "#d95f02",
+   "Coder 3" = "#fc8d62", "Coder 5" = "#fc8d62", "Coder 4" = "#fc8d62", "Coder 1" = "#fc8d62", "Coder 2" = "#fc8d62",
+   "GPT-4 Zero-shot" = "#1b9e77", "BOW Logit" = "#7570b3", "Modal Class" = "#7570b3"
+ )

> p <-ggplot(combined_df, aes(x = Coder, y = Accuracy_average, group = Source, 
+                         color = Coder, linetype = Source, shape = Source)) +
+   geom_point(position = position_dodge(width = 0.4), size = 3) +
+   geom_errorbar(aes(ymin = lower, ymax = upper),
+                 position = position_dodge(width = 0.4),
+                 width = 0.1, size = 0.9) +
+   scale_color_manual(values = coder_colors, guide = "none") +
+   scale_linetype_manual(values = c("Original" = "solid", "Expert Panel" = "twodash")) +
+   scale_shape_manual(values = c("Original" = 16, "Expert Panel" = 1)) +  # ← shapes for points
+   labs(x = "", y = "Average Accuracy", linetype = "Dataset", shape = "Dataset") +
+   geom_hline(yintercept = 0.74760, linetype = "dotted", color = "gray40") +
+   theme_bw(base_size = 14) +
+   theme(
+     legend.position = "bottom",
+     legend.box.background = element_rect(color = "black", size = 0.5),
+     legend.title = element_text(face = "bold"),
+     legend.text = element_text(size = 12),
+     axis.text.x = element_text(size = 12),
+     panel.grid.major = element_blank(),
+     panel.grid.minor = element_blank(),
+     panel.border = element_blank(),
+     axis.line = element_line(color = "grey")
+   )

> ggsave("03_output/figureE1.pdf", p, width = 20, height = 10)
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureE1.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureE2.R @ 2025-12-09 08:18:39 


> library(dplyr)

> library(ggplot2)

> library(tidyr)

> library(stringr)

> expert2 <- read.csv("01_data/expert/expert2.csv")

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- test %>% dplyr::select(-Security, -Tax)

> categories <- c("Prediction", "Status_quo", "Specifics", "Sociotropic",
+                 "Human_right", "Environment", "Ideology", "Interest_groups")

> test_matched <- test[match(expert2$Index, test$Index), ]

> accuracy_vector <- numeric(length(categories))

> for (i in seq_along(categories)) {
+   category <- categories[i]
+   accuracy_vector[i] <- mean(test_matched[[category]] == expert2[[category]], na.rm = TRUE)
+ }

> accuracy_results <- data.frame(
+   Category = categories,
+   Accuracy = accuracy_vector * 100 
+ )

> calc_f1 <- function(true, pred) {
+   tp <- sum(true == 1 & pred == 1, na.rm = TRUE)
+   fp <- sum(true == 0 & pred == 1, na.rm = TRUE)
+   fn <- sum(true == 1 & pred == 0, na.rm = TRUE)
+   
+   precision <- ifelse(tp + fp == 0, 0, tp / (tp + fp))
+   recall <- ifelse(tp + fn == 0, 0, tp / (tp + fn))
+   
+   if (precision + recall == 0) {
+     return(0)
+   } else {
+     return(2 * precision * recall / (precision + recall))
+   }
+ }

> f1_scores <- sapply(categories, function(cat) {
+   calc_f1(expert2[[cat]], test_matched[[cat]])
+ })

> f1_results <- data.frame(
+   Category = categories,
+   F1_Score = f1_scores * 100
+ )

> combined_results <- left_join(accuracy_results, f1_results, by = "Category") %>%
+   pivot_longer(cols = c(Accuracy, F1_Score),
+                names_to = "Metric",
+                values_to = "Score") %>%
+   mutate(
+     Category = factor(Category, levels = categories),
+     Category = str_replace_all(Category, "_", " "),
+     Metric = recode(Metric, F1_Score = "F1 score") 
+   )

> combined_results$Category <- factor(combined_results$Category,
+                                     levels = str_replace_all(categories, "_", " "))

> p <- ggplot(combined_results, aes(x = Category, y = Score, fill = Metric)) +
+   geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.6, color = "black") +
+   scale_fill_manual(values = c("Accuracy" = "#1b9e77", "F1 score" = "#d95f02")) +
+   labs(title = "",
+        x = NULL,
+        y = "Score (%)",
+        fill = "") +
+   theme_minimal(base_size = 14) +
+   theme(
+     axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
+     axis.text.y = element_text(size = 10),
+     axis.title.y = element_text(size = 12),
+     legend.position = "bottom",
+     legend.box.background = element_rect(color = "black", size = 0.6),
+     legend.background = element_rect(fill = "white", color = NA),
+     legend.text = element_text(size = 11),
+     legend.title = element_text(size = 14, face = "bold"),
+     panel.grid.major.x = element_blank()
+   )

> ggsave("03_output/figureE2.pdf", p, width = 20, height = 10)
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/figureE2.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD2.R @ 2025-12-09 08:18:39 


> library(knitr)

> library(kableExtra)
G3;
Attaching package: ‘kableExtra’

gG3;The following object is masked from ‘package:dplyr’:

    group_rows

g
> df <- data.frame(
+   Category = c(
+     "Prediction", "Status-quo", "Specifics", "Sociotropics",
+     "Human/Labor Right", "Environment", "Ideology", "Interest Groups"
+   ),
+   Accuracy  = c(0.84, 0.82, 0.90, 0.85, 0.99, 0.99, 0.99, 0.92),
+   f1        = c(0.529, 0.563, 0.545, 0.694, 0.888, 0.933, 0.963, 0.852),
+   Recall    = c(0.643, 0.667, 0.462, 0.607, 0.800, 1.000, 1.000, 0.958),
+   Precision = c(0.450, 0.486, 0.667, 0.809, 1.000, 0.875, 0.928, 0.767)
+ )

> latex_table <- 
+   kable(df, "latex", booktabs = TRUE,
+         caption = "Majority of Upworkers and Supervised Fine-tuned GPT-3 (US Chamber of Commerce, Press release)",
+         label = "tab:chamberupwork",
+         align = c("l","c","c","c","c")) %>%
+   kable_styling(latex_options = c("hold_position"))

> cat(latex_table, file = "03_output/tableD2.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD2.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD3.R @ 2025-12-09 08:18:39 


> library(knitr)

> library(kableExtra)

> df2 <- data.frame(
+   Category = c(
+     "Prediction",
+     "Status-quo",
+     "Specifics",
+     "Sociotropics",
+     "Human/Labor Right",
+     "Environment",
+     "Ideology",
+     "Interest Groups"
+   ),
+   Accuracy  = c(0.89, 0.83, 0.85, 0.89, 0.99, 0.95, 0.95, 0.89),
+   f1        = c(0.78, 0.738, 0.571, 0.841, NA, NA, 0.545, 0.887),
+   Recall    = c(0.824, 0.857, 0.588, 0.879, NA, 0, 1, 0.956),
+   Precision = c(0.636, 0.649, 0.555, 0.806, NA, 0, 0.375, 0.827)
+ )

> latex_table <-  kable(df2, "latex", booktabs = TRUE,
+       caption = "Majority of Upworkers and Supervised Fine-tuned GPT-3 (US Chamber of Commerce, Letters to Congress)",
+       label = "tab:chamberupwork2",
+       align = c("l","c","c","c","c")) %>%
+   kable_styling(latex_options = c("hold_position")) %>%
+   column_spec(1, width = "5cm") %>%
+   column_spec(2:5, width = "2.2cm")

> cat(latex_table, file = "03_output/tableD3.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD3.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD4.R @ 2025-12-09 08:18:39 


> library(knitr)

> library(kableExtra)

> df3 <- data.frame(
+   Category = c(
+     "Prediction",
+     "Status-quo",
+     "Specifics",
+     "Sociotropics",
+     "Human/Labor Right",
+     "Environment",
+     "Ideology",
+     "Interest Groups"
+   ),
+   Accuracy  = c(0.877, 0.755, 0.887, 0.952, 0.943, 0.943, 0.991, 0.981),
+   f1        = c(0.592, 0.552, 0.647, 0.872, 0.800, 0.667, 0.960, NA),
+   Recall    = c(0.888, 0.842, 0.579, 0.809, 0.667, 0.667, 1.000, NA),
+   Precision = c(0.444, 0.410, 0.733, 0.944, 1.000, 0.667, 0.923, NA)
+ )

> latex_table <- kable(df3, "latex", booktabs = TRUE,
+       caption = "Majority of Upworkers and Supervised Fine-tuned GPT-3 (USTR, Press release)",
+       label = "tab:ustrupwork1",
+       align = c("l","c","c","c","c")) %>%
+   kable_styling(latex_options = c("hold_position")) %>%
+   column_spec(1, width = "5cm") %>%
+   column_spec(2:5, width = "2.2cm")

> cat(latex_table, file = "03_output/tableD4.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD4.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD5.R @ 2025-12-09 08:18:39 


> library(knitr)

> library(kableExtra)

> df_hearings <- data.frame(
+   Category = c(
+     "Prediction",
+     "Status-quo",
+     "Specifics",
+     "Sociotropics",
+     "Human/Labor Right",
+     "Environment",
+     "Ideology",
+     "Interest Groups"
+   ),
+   Accuracy  = c(0.877, 0.802, 0.924, 0.811, 0.99, 0.99, 0.94, 1.00),
+   f1        = c(0.518, 0.533, 0.636, 0.68, NA, NA, 0.86, NA),
+   Recall    = c(0.875, 0.857, 0.636, 0.56, NA, NA, 1.00, NA),
+   Precision = c(0.368, 0.387, 0.636, 0.88, NA, NA, 0.76, NA)
+ )

> latex_table <- kable(df_hearings, "latex", booktabs = TRUE,
+       caption = "Majority of Upworkers and Supervised Fine-tuned GPT-3 (USTR, Congressional Hearings)",
+       label = "tab:ustrupwork2",
+       align = c("l","c","c","c","c")) %>%
+   kable_styling(latex_options = c("hold_position")) %>%
+   column_spec(1, width = "5cm") %>%
+   column_spec(2:5, width = "2.2cm")

> cat(latex_table, file = "03_output/tableD5.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableD5.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableE1.R @ 2025-12-09 08:18:39 


> library(dplyr)

> library(purrr)
G3;
Attaching package: ‘purrr’

gG3;The following object is masked from ‘package:plyr’:

    compact

gG3;The following object is masked from ‘package:caret’:

    lift

g
> library(tidyr)

> library(irr)    
G3;Loading required package: lpSolve
g
> library(knitr)

> library(kableExtra)

> library(scales)
G3;
Attaching package: ‘scales’

gG3;The following object is masked from ‘package:purrr’:

    discard

g
> expert2 <- read.csv("01_data/expert/expert2.csv")

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- test %>% dplyr::select(-Security, -Tax)

> cats <- c("Prediction","Status_quo","Specifics","Sociotropic",
+           "Human_right","Environment","Ideology","Interest_groups")

> dat <- expert2 %>%
+   inner_join(test, by = "Index", suffix = c(".exp", ".test"))

> reliability_one <- function(cat){
+   x <- dat[[paste0(cat, ".exp")]]
+   y <- dat[[paste0(cat, ".test")]]
+   
+   keep <- complete.cases(x, y)
+   x <- x[keep]
+   y <- y[keep]
+   
+   tibble(
+     category = cat,
+     n_units = length(x),
+     percent_agreement = mean(x == y),
+     cohen_kappa = irr::kappa2(data.frame(x, y), weight = "unweighted")$value
+   )
+ }

> results <- map_dfr(cats, reliability_one)

> results
# A tibble: 8 × 4
  category        n_units percent_agreement cohen_kappa
  <chr>             <int>             <dbl>       <dbl>
1 Prediction          202             0.941       0.869
2 Status_quo          202             0.946       0.882
3 Specifics           202             0.975       0.938
4 Sociotropic         202             0.955       0.888
5 Human_right         202             0.975       0.895
6 Environment         202             1           1    
7 Ideology            202             0.980       0.935
8 Interest_groups     202             0.916       0.824

> cats <- c("Prediction","Status_quo","Specifics","Sociotropic",
+           "Human_right","Environment","Ideology","Interest_groups")

> dat <- expert2 %>%
+   inner_join(test, by = "Index", suffix = c(".exp", ".test"))

> long <- map_dfr(
+   cats,
+   ~ tibble(
+     category = .x,
+     exp  = dat[[paste0(.x, ".exp")]],
+     test = dat[[paste0(.x, ".test")]]
+   )
+ ) %>%
+   mutate(
+     exp  = as.factor(as.character(exp)),
+     test = as.factor(as.character(test))
+   )

> confusion <- with(long, table(exp, test))

> confusion
   test
exp    0    1
  0 1125   19
  1   44  428

> # ============================================================================
> #                           LaTeX TABLES
> # ============================================================================
> 
> 
> results_print <- results %>%
+   mutate(
+     percent_agreement = scales::percent(percent_agreement, accuracy = 0.1),
+     cohen_kappa = sprintf("%.3f", cohen_kappa)
+   ) %>%
+   select(
+     `Category` = category,
+     `N` = n_units,
+     `Percent agreement` = percent_agreement,
+     `Cohen's \\(\\kappa\\)` = cohen_kappa
+   )

> irr_caption <- "Inter-coder reliability by category (Original vs. Expert Panel)."

> tab_results <- kable(
+   results_print,
+   format   = "latex",
+   booktabs = TRUE,
+   caption  = irr_caption,
+   label    = "tab:irr",
+   align    = c("l","r","r","r"),
+   escape   = FALSE
+ ) %>%
+   kable_styling(latex_options = c("hold_position"))

> cat(tab_results, file = "03_output/tableE1.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableE1.R 

>>> Running: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableE2.R @ 2025-12-09 08:18:40 


> library(dplyr)

> library(purrr)

> library(tidyr)

> library(irr)    

> library(knitr)

> library(kableExtra)

> library(scales)

> expert2 <- read.csv("01_data/expert/expert2.csv")

> test <- read.csv("01_data/interestgroups_test.csv")

> test <- test %>% dplyr::select(-Security, -Tax)

> cats <- c("Prediction","Status_quo","Specifics","Sociotropic",
+           "Human_right","Environment","Ideology","Interest_groups")

> dat <- expert2 %>%
+   inner_join(test, by = "Index", suffix = c(".exp", ".test"))

> reliability_one <- function(cat){
+   x <- dat[[paste0(cat, ".exp")]]
+   y <- dat[[paste0(cat, ".test")]]
+   
+   keep <- complete.cases(x, y)
+   x <- x[keep]
+   y <- y[keep]
+   
+   tibble(
+     category = cat,
+     n_units = length(x),
+     percent_agreement = mean(x == y),
+     cohen_kappa = irr::kappa2(data.frame(x, y), weight = "unweighted")$value
+   )
+ }

> results <- map_dfr(cats, reliability_one)

> results
# A tibble: 8 × 4
  category        n_units percent_agreement cohen_kappa
  <chr>             <int>             <dbl>       <dbl>
1 Prediction          202             0.941       0.869
2 Status_quo          202             0.946       0.882
3 Specifics           202             0.975       0.938
4 Sociotropic         202             0.955       0.888
5 Human_right         202             0.975       0.895
6 Environment         202             1           1    
7 Ideology            202             0.980       0.935
8 Interest_groups     202             0.916       0.824

> cats <- c("Prediction","Status_quo","Specifics","Sociotropic",
+           "Human_right","Environment","Ideology","Interest_groups")

> dat <- expert2 %>%
+   inner_join(test, by = "Index", suffix = c(".exp", ".test"))

> long <- map_dfr(
+   cats,
+   ~ tibble(
+     category = .x,
+     exp  = dat[[paste0(.x, ".exp")]],
+     test = dat[[paste0(.x, ".test")]]
+   )
+ ) %>%
+   mutate(
+     exp  = as.factor(as.character(exp)),
+     test = as.factor(as.character(test))
+   )

> confusion <- with(long, table(exp, test))

> confusion
   test
exp    0    1
  0 1125   19
  1   44  428

> overall_pa    <- mean(long$exp == long$test)

> overall_kappa <- irr::kappa2(data.frame(long$exp, long$test), weight = "unweighted")$value

> conf_with_totals <- addmargins(confusion)

> conf_df <- as.data.frame.matrix(conf_with_totals)

> rownames(conf_df) <- paste0("Original: ", rownames(conf_df))

> colnames(conf_df) <- paste0("Expert Panel: ", colnames(conf_df))

> rn <- rownames(conf_df); cn <- colnames(conf_df)

> rn[length(rn)] <- "Total"

> cn[length(cn)] <- "Total"

> rownames(conf_df) <- rn

> colnames(conf_df) <- cn

> conf_caption <- sprintf(
+   "Aggregated confusion matrix across all categories (Original vs. Expert Panel). Overall agreement: %s; Cohen's \\(\\kappa\\): %.3f.",
+   scales::percent(overall_pa, accuracy = 0.1), overall_kappa
+ )

> tab_conf <- kable(
+   conf_df,
+   format   = "latex",
+   booktabs = TRUE,
+   caption  = conf_caption,
+   label    = "tab:confusion",
+   align    = "r",
+   escape   = FALSE
+ ) %>%
+   kable_styling(latex_options = c("hold_position"))

> cat(tab_conf, file = "03_output/tableE2.tex")
<<< Completed: /Users/dahyunc/Princeton Dropbox/Choi Dahyun/PSRM_llm_replication_dataverse_final/02_code/tableE2.R 
There were 15 warnings (use warnings() to see them)

=== Replication finished at: 2025-12-09 08:18:40 ===
